
******************************************************
* 				Replication do-file                  *
*         Review of Economics and Statistics         *
*                                                    *
*               Choosing Your Pond                   *
*       Location Choices and Relative Income         *
*                                                    *
*		Nicolas Bottan & Ricardo Perez-Truglia       *
*                                                    *
*                   September 2020                   *
*                                                    *
******************************************************
/*

                    Notes
					
This do-file prepares data from the MTurk Experiment. 

Subject PII (IP addresses, mturk codes, geographic information) was
removed to protect anonimity of subjects.

*** Install Stata commands
cap: ssc install renames
cap: ssc install ivreg2
cap: ssc install ranktest 
cap: ssc install esttab
cap: ssc install outreg2
cap: ssc install parmest
cap: ssc install qqvalue
cap: ssc install quantiles
*/


*** Set WD
cd ""		// Set path for main folder here


use "Mturk Experiment/mturk_raw_anonymized.dta", clear

keep if finished=="True"

* Choice set
rename qid49_1 state1
rename qid49_2 metro1
rename qid52 knows1

rename qid50_1 state2
rename qid50_2 metro2
rename qid53 knows2

destring knows1, replace
destring knows2, replace

gen rel_knows=knows1-knows2

gen wage1=54000
gen wage2=54000

* Other variables
gen male=qid4!="Female"
drop qid4

replace qid5_1="80" if qid5_1=="Over 80"
destring qid5_1, replace
rename qid5_1 age

encode qid8, gen(maritalstat)
encode q88, gen(nkids)
replace nkids=0 if nkids==5
gen single=marital==3 if marital!=.

gen attnchk=q173=="None of the above"


*** Prepare beliefs

* Rel. Prices
destring qid67, gen(pre_px1) i("%")
replace pre_px1 = (100 - pre_px1) if q66=="cheaper"
replace pre_px1 = (100 + pre_px1) if q66=="more expensive"

destring qid69, gen(pre_px2) i("%")
replace pre_px2 = (100 - pre_px2) if q67=="cheaper"
replace pre_px2 = (100 + pre_px2) if q67=="more expensive"

destring qid72, gen(post_px1) i("%")
replace post_px1 = (100 - post_px1) if q69=="cheaper"
replace post_px1 = (100 + post_px1) if q69=="more expensive"

destring qid73, gen(post_px2) i("%")
replace post_px2 = (100 - post_px2) if q70=="cheaper"
replace post_px2 = (100 + post_px2) if q70=="more expensive"


gen pre_relpx = (pre_px1-pre_px2)/100
gen post_relpx = (post_px1-post_px2)/100


* Rel. Income
destring qid62, gen(pre_inc1) i("Richer than " "% of individual earners")
destring qid63, gen(pre_inc2) i("Richer than " "% of individual earners")

destring qid75, gen(post_inc1) i("Richer than " "% of individual earners")
destring qid76, gen(post_inc2) i("Richer than " "% of individual earners")

gen pre_relinc = (pre_inc1-pre_inc2)/100
gen post_relinc = (post_inc1-post_inc2)/100


* Rankings
encode qid77, gen(rankaux)
gen rank=6 if rankaux==5
replace rank=5 if rankaux==3
replace rank=4 if rankaux==1
replace rank=3 if rankaux==2
replace rank=2 if rankaux==4
replace rank=1 if rankaux==6
drop rankaux

gen baseline_finalrank=rank>=4 if rank!=.



*** Prepare treatment 

preserve

use "Ancillary Data/program_metro_costs.dta", clear
bys cbsa: keep if _n==1
rename msa_name msa
tempfile col
save "`col'", replace

use "Ancillary Data/program_metro_income.dta", clear
bys cbsa: keep if _n==1
rename msa_name msa
merge 1:1 msa using "`col'"
keep if _mer==3
drop _mer
save "`col'", replace

restore

rename metro1 msa
merge m:1 msa using "`col'"
keep if _mer==3
drop _mer

foreach var in mu_cps mu_acs sig_cps sig_acs colhat rpphat rppimp colimp {
	rename `var' `var'1
	}
rename msa metro1

rename metro2 msa
merge m:1 msa using "`col'"
keep if _mer==3
drop _mer

foreach var in mu_cps mu_acs sig_cps sig_acs colhat rpphat rppimp colimp {
	rename `var' `var'2
	}
rename msa metro2

forvalues i=1/2 {
	gen inccps`i' = round(normal((log(wage`i')-mu_cps`i')/sig_cps`i') * 100,.1)
	gen incacs`i' = round(normal((log(wage`i')-mu_acs`i')/sig_acs`i') * 100,.1)
	}

gen relpx_shown = (colhat1-colhat2)/100 if sourcecol=="colhat"
replace relpx_shown = (rpphat1-rpphat2)/100 if sourcecol=="rpphat"
gen relpx_cfact = (colhat1-colhat2)/100 if sourcecol=="rpphat"
replace relpx_cfact = (rpphat1-rpphat2)/100 if sourcecol=="colhat"

gen relinc_shown = (inccps1-inccps2)/100 if sourceinc=="cps"
replace relinc_shown = (incacs1-incacs2)/100 if sourceinc=="acs"

gen relinc_cfact = (inccps1-inccps2)/100 if sourceinc=="acs"
replace relinc_cfact = (incacs1-incacs2)/100 if sourceinc=="cps"

gen relpx_diff = relpx_shown - relpx_cfact 
gen relinc_diff = relinc_shown - relinc_cfact

drop mu_cps1 mu_acs1 sig_cps1 sig_acs1 colhat1 colimp1 rpphat1 rppimp1 mu_cps2 mu_acs2 sig_cps2 sig_acs2 colhat2 colimp2 rpphat2 rppimp2 inccps1 incacs1 incacs1 inccps2 incacs1 inccps2 incacs2

* Drop if select metro too fast
destring q92_pagesubmit, replace
drop if q92_pagesubmit<11

* Drop if survey java failed and same city selected
drop if metro1==metro2



*** Prepare Perceptions Data
* Schools
local var "schools"
local r1="better"
local r2="worse"
local a1=102
local b1=103
local a2=105
local b2=107

destring q`b1', gen(`var'1) i("%")
replace `var'1 = (100 - `var'1) if q`a1'=="`r1'"
replace `var'1 = (100 + `var'1) if q`a1'=="`r2'"

destring q`b2', gen(`var'2) i("%")
replace `var'2 = (100 - `var'2) if q`a2'=="`r1'"
replace `var'2 = (100 + `var'2) if q`a2'=="`r2'"
drop q102 q103 q105 q107

* Crime
local var "crime"
local r1="higher"
local r2="lower"
local a1=148
local b1=149
local a2=151
local b2=152

destring q`b1', gen(`var'1) i("%")
replace `var'1 = (100 - `var'1) if q`a1'=="`r1'"
replace `var'1 = (100 + `var'1) if q`a1'=="`r2'"

destring q`b2', gen(`var'2) i("%")
replace `var'2 = (100 - `var'2) if q`a2'=="`r1'"
replace `var'2 = (100 + `var'2) if q`a2'=="`r2'"

* Healthcare
local var "health"
local r1="better"
local r2="worse"
local a1=154
local b1=155
local a2=157
local b2=158

destring q`b1', gen(`var'1) i("%")
replace `var'1 = (100 - `var'1) if q`a1'=="`r1'"
replace `var'1 = (100 + `var'1) if q`a1'=="`r2'"

destring q`b2', gen(`var'2) i("%")
replace `var'2 = (100 - `var'2) if q`a2'=="`r1'"
replace `var'2 = (100 + `var'2) if q`a2'=="`r2'"
drop q154 q155 q157 q158

* Public spaces
local var "pubspace"
local r1="better"
local r2="worse"
local a1=160
local b1=161
local a2=163
local b2=164

destring q`b1', gen(`var'1) i("%")
replace `var'1 = (100 - `var'1) if q`a1'=="`r1'"
replace `var'1 = (100 + `var'1) if q`a1'=="`r2'"

destring q`b2', gen(`var'2) i("%")
replace `var'2 = (100 - `var'2) if q`a2'=="`r1'"
replace `var'2 = (100 + `var'2) if q`a2'=="`r2'"
drop q160 q161 q163 q164

* Environment
local var "environment"
local r1="better"
local r2="worse"
local a1=166
local b1=167
local a2=169
local b2=170

destring q`b1', gen(`var'1) i("%")
replace `var'1 = (100 - `var'1) if q`a1'=="`r1'"
replace `var'1 = (100 + `var'1) if q`a1'=="`r2'"

destring q`b2', gen(`var'2) i("%")
replace `var'2 = (100 - `var'2) if q`a2'=="`r1'"
replace `var'2 = (100 + `var'2) if q`a2'=="`r2'"
drop q166 q167 q169 q170

* Entertainment
local var "entertain"
local r1="better"
local r2="worse"
local a1=172
local b1=200
local a2=175
local b2=176

destring v`b1', gen(`var'1) i("%")
replace `var'1 = (100 - `var'1) if q`a1'=="`r1'"
replace `var'1 = (100 + `var'1) if q`a1'=="`r2'"

destring q`b2', gen(`var'2) i("%")
replace `var'2 = (100 - `var'2) if q`a2'=="`r1'"
replace `var'2 = (100 + `var'2) if q`a2'=="`r2'"
drop q172 v200 q175 q176

* College
local var "college"
local r1="higher"
local r2="lower"
local a1=178
local b1=179
local a2=181
local b2=182

destring q`b1', gen(`var'1) i("%")
replace `var'1 = (100 - `var'1) if q`a1'=="`r1'"
replace `var'1 = (100 + `var'1) if q`a1'=="`r2'"

destring q`b2', gen(`var'2) i("%")
replace `var'2 = (100 - `var'2) if q`a2'=="`r1'"
replace `var'2 = (100 + `var'2) if q`a2'=="`r2'"
drop q178 q179 q181 q182

* Trump
local var "trump"
local r1="higher"
local r2="lower"
local a1=184
local b1=185
local a2=187
local b2=188

destring q`b1', gen(`var'1) i("%")
replace `var'1 = (100 - `var'1) if q`a1'=="`r1'"
replace `var'1 = (100 + `var'1) if q`a1'=="`r2'"

destring q`b2', gen(`var'2) i("%")
replace `var'2 = (100 - `var'2) if q`a2'=="`r1'"
replace `var'2 = (100 + `var'2) if q`a2'=="`r2'"
drop q184 q185 q187 q188

* Order
rename osch o_schools
rename ocri o_crime
rename ohea o_health
rename opub o_pubspace
rename oenv o_environment
rename oent o_entertain
rename ocol o_college
rename otru o_trump

foreach var in schools crime health pubspace environment entertain college trump {
	gen rel_`var' = (`var'1-`var'2)/100
	drop o_`var'
	}





*** Merge additional controls for Metro Characteristics

cap: drop msa
gen msa=metro1
merge m:1 msa using "Ancillary Data/auxilliarydata.dta"
drop if _mer==2
drop _merge msa
foreach var in pop hhsize hhinc pcinc houses agrent gini pcturban aland awater ltotpop popdens pctmale race_white race_black hispanic edu_lths edu_hs edu_scol highered highered_m highered_f pct200k house_owner house1mil poverty foreign nohealth age2534_m age2534_f qol county_sharepop_dem total_expenditure total_educ_total_exp higher_ed_total_exp fire_prot_total_expend health_total_expend total_hospital_total_exp total_highways_tot_exp transit_sub_total_exp libraries_total_expend police_prot_total_exp ronswanson county_crime_rate_all county_crime_rate_violent county_irs_cont_pc {
	rename `var' `var'1
	}
gen msa=metro2
merge m:1 msa using "Ancillary Data/auxilliarydata.dta"
drop if _mer==2
drop _merge msa
foreach var in pop hhsize hhinc pcinc houses agrent gini pcturban aland awater ltotpop popdens pctmale race_white race_black hispanic edu_lths edu_hs edu_scol highered highered_m highered_f pct200k house_owner house1mil poverty foreign nohealth age2534_m age2534_f qol county_sharepop_dem total_expenditure total_educ_total_exp higher_ed_total_exp fire_prot_total_expend health_total_expend total_hospital_total_exp total_highways_tot_exp transit_sub_total_exp libraries_total_expend police_prot_total_exp ronswanson county_crime_rate_all county_crime_rate_violent county_irs_cont_pc { 
	rename `var' `var'2
	}


* Relative characteristics	
gen rel_qol=qol1-qol2

gen lnpop = log(pop1/pop2)
gen relwhite=race_white1/race_white2
gen relblack=race_black1/race_black2
gen relhispanic=hispanic1/hispanic2
gen reldens=popdens1/popdens2
gen relforeign=foreign1/foreign2

gen relrent=agrent1/agrent2
gen relhed=highered_f1/highered_f2 if male==1
replace relhed=highered_m1/highered_m2 if male==0

replace pctmale1=1-pctmale1 if male==1
replace pctmale2=1-pctmale2 if male==1
gen relgender=pctmale1/pctmale2
gen relgini=gini1/gini2

gen relpct200k=pct200k1/pct200k2
gen relurban=pcturban1/pcturban2
gen reldateage=age2534_f1/age2534_f2 if male==1
replace  reldateage=age2534_m1/age2534_m2 if male==0

gen lnrelcrime=log((county_crime_rate_all1+.001)/(county_crime_rate_all2+.001))
gen lnrelviolcrime=log((county_crime_rate_violent1+.001)/(county_crime_rate_violent2+.001))
gen lnrelcont=log(county_irs_cont_pc1/county_irs_cont_pc2)
gen reldemocrat=county_sharepop_dem1-county_sharepop_dem2
gen pubgood_total = log(total_expenditure1/total_expenditure2)
gen pubgood_educ = log(total_educ_total_exp1/total_educ_total_exp2)
gen pubgood_fire = log(fire_prot_total_expend1/fire_prot_total_expend2)
gen pubgood_health = log(health_total_expend1/health_total_expend2)
gen pubgood_hosp = log(total_hospital_total_exp1/total_hospital_total_exp2)
gen pubgood_library = log( libraries_total_expend1/libraries_total_expend2)
gen pubgood_police = log(police_prot_total_exp1/police_prot_total_exp2)
gen pubgood_parkrecs = log(ronswanson1/ronswanson2)

drop hhsize* hhinc* pcinc* houses* agrent* gini* pcturban* aland* awater* ltotpop* popdens* pctmale* race_white* race_black* hispanic* edu_lths* edu_hs* edu_scol* highered* highered_m* highered_f* pct200k* house_owner* house1mil* poverty* foreign* nohealth* age2534_m* age2534_f* qol* county_sharepop_dem* total_expenditure* total_educ_total_exp* higher_ed_total_exp* fire_prot_total_expend* health_total_expend* total_hospital_total_exp* total_highways_tot_exp* transit_sub_total_exp* libraries_total_expend* police_prot_total_exp* ronswanson* county_crime_rate* county_irs_cont_pc*


drop startdate-userlanguage qid*
drop *_firstclick *_lastclick *_pagesubmit *_clickcount
drop q63 q88 q173 q66 q67 q69 q70 q148 q149 q151 q152 N loc count ffbkcol ffbkincrk trat wage1 wage2

order id age male nkids single baseline_final knows2 rel_knows rel_qol lnpop reldens relblack reldemocrat relurban rel_schools rel_crime rel_health rel_pubspace rel_environment rel_entertain rel_college rel_trump


*** Label variables
label variable id "Subject ID"
label variable age "Age"
label variable male "Male (=1)"
label variable nkids "Nr children"
label variable single "Single (=1)"
label variable baseline_fina~k "Prefers city #1 (=1)"
label variable knows2 "Score familiar with city #2"
label variable rel_knows "Relative familiarity score"
label variable rel_qol "Relative QOL [Albouy]"
label variable lnpop "Log ratio population [ACS 2010-2014 msa]"
label variable reldens "Ratio population density [ACS 2010-2014 msa]"
label variable relblack "Ratio black [ACS 2010-2014 msa]"
label variable reldemocrat "Relative democrat share"
label variable relurban "Ratio Share Urban population [ACS 2010-2014 msa]"
label variable rel_schools "Relative belief schools"
label variable rel_crime "Relative belief crime"
label variable rel_health "Relative belief health"
label variable rel_pubspace "Relative belief public spaces"
label variable rel_environment "Relative belief environment"
label variable rel_entertain "Relative belief entertainment"
label variable rel_college "Relative belief college"
label variable rel_trump "Relative belief trump votes"
label variable state1 "Chosen State #1"
label variable metro1 "Chosen City #1"
label variable knows1 "Score familiar with city #1"
label variable state2 "Chosen State #2"
label variable metro2 "Chosen City #2"
label variable pre_px1 "Prior belief COL city #1"
label variable pre_px2 "Prior belief COL city #2"
label variable pre_inc1 "Prior belief ER city #1"
label variable pre_inc2 "Prior belief ER city #2"
label variable post_px1 "Posterior belief COL city #1"
label variable post_px2 "Posterior belief COL city #2"
label variable post_inc1 "Posterior belief ER city #1"
label variable post_inc2 "Posterior belief ER city #2"
label variable schools1 "Belief Schools city #1"
label variable schools2 "Belief Schools city #2"
label variable crime1 "Belief Crime city #1"
label variable crime2 "Belief Crime city #2"
label variable health1 "Belief health city #1"
label variable health2 "Belief health city #2"
label variable pubspace1 "Belief public space city #1"
label variable pubspace2 "Belief public space city #2"
label variable environment1 "Belief environment city #1"
label variable environment2 "Belief environment city #2"
label variable entertain1 "Belief entertainment city #1"
label variable entertain2 "Belief entertainment city #2"
label variable college1 "Belief college city #1"
label variable college2 "Belief college city #2"
label variable trump1 "Belief trump city #1"
label variable trump2 "Belief trump city #2"
label variable sourcecol "Assigned source: COL"
label variable sourceinc "Assigned source: ER"
label variable valuecol1 "Assigned value COL city #1"
label variable valuecol2 "Assigned value COL city #2"
label variable valueinc1 "Assigned value ER city #1"
label variable valueinc2 "Assigned value ER city #2"
label variable maritalstat "Marital Status"
label variable attnchk "Passes attention check (=1)"
label variable pre_relpx "Relative COL, Prior"
label variable post_relpx "Relative COL, Posterior"
label variable pre_relinc "Relative ER, Prior"
label variable post_relinc "Relative ER, Posterior"
label variable rank "Likert score location choice (>4 favors city #1)"
label variable relpx_shown "Relative COL, Shown"
label variable relpx_cfact "Relative COL, Alternative"
label variable relinc_shown "Relative ER, Shown"
label variable relinc_cfact "Relative ER, Alternative"
label variable relpx_diff "Relative COL, Shown-Alternative"
label variable relinc_diff "Relative ER, Shown-Alternative"
label variable pop1 "Total Population city #1 [ACS 2010-2014 msa]"
label variable pop2 "Total Population city #2 [ACS 2010-2014 msa]"
label variable relwhite "Ratio Share White [ACS 2010-2014 msa]"
label variable relhispanic "Ratio Share Hispanic [ACS 2010-2014 msa]"
label variable relforeign "Ratio Share Foreign [ACS 2010-2014 msa]"
label variable relrent "Ratio Share Rent [ACS 2010-2014 msa]"
label variable relhed "Ratio Share higher ed [ACS 2010-2014 msa]"
label variable relgender "Ratio Share male [ACS 2010-2014 msa]"
label variable relgini "Ratio ginis [ACS 2010-2014 msa]"
label variable relpct200k "Ratio Share income >200k [ACS 2010-2014 msa]"
label variable reldateage "Ratio Pop aged 25-34 [ACS 2010-2014 msa]"
label variable lnrelcrime "Log Ratio Crime rate [UCR]"
label variable lnrelviolcrime "Log Ratio Violent Crime rate [UCR]"
label variable lnrelcont "Log Ratio Charitable Contributions per capita [IRS SOI]"
label variable pubgood_total "Log Ratio total local expenditures [Census of Govts]"
label variable pubgood_educ "Log Ratio total local education expenditure [Census of Govts]"
label variable pubgood_fire "Log Ratio total local fire expenditures [Census of Govts]"
label variable pubgood_health "Log Ratio total local health expenditures [Census of Govts]"
label variable pubgood_hosp "Log Ratio total local hospital expenditures [Census of Govts]"
label variable pubgood_library "Log Ratio total local library expenditures [Census of Govts]"
label variable pubgood_police "Log Ratio total local police expenditures [Census of Govts]"
label variable pubgood_parkr~s "Log Ratio total local parks expenditures [Census of Govts]"


compress
save "Mturk Experiment/mturk_experiment_clean.dta", replace




